This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(gapminder)
library(purrr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mean_func <- function(x) {
sum(x) / length(x)
}
median_func <- function(x) {
n <- length(x)
x_sorted <- sort(x)
if(n %% 2 == 0) {
(x_sorted[n/2] + x_sorted[(n/2) + 1]) / 2
} else {
x_sorted[(n+1)/2]
}
}
var_func <- function(x) {
n <- length(x)
x_mean <- mean_func(x)
sum((x - x_mean)^2) / (n - 1)
}
skewness_func <- function(x) {
n <- length(x)
x_mean <- mean_func(x)
x_sd <- sqrt(var_func(x))
x_median <- median_func(x)
#3(x_mean-x_median) / x_sd
sum((x - x_mean)^3) / (n * x_sd^3)
}
# Simulate 1000 observations from the standard normal distribution
set.seed(123)
x1 <- rnorm(1000)
# Simulate 1000 observations from the beta distribution with parameters 6 and 2
x2 <- rbeta(1000, 6, 2)
# Simulate 1000 observations from the chi-square distribution with 5 degrees of freedom
x3 <- rchisq(1000, 5)
# Compute the mean, median, variance, and skewness for each distribution
mean_vec <- c(mean_func(x1), mean_func(x2), mean_func(x3))
median_vec <- c(median_func(x1), median_func(x2), median_func(x3))
variance_vec <- c(var_func(x1), var_func(x2), var_func(x3))
skewness_vec <- c(skewness_func(x1), skewness_func(x2), skewness_func(x3))
# Create a table of the results
results <- data.frame(Distribution = c("Standard Normal", "Beta (6,2)", "Chi-Square (df=5)"),
Mean = round(mean_vec, 3),
Median = round(median_vec, 3),
Variance = round(variance_vec, 3),
Skewness = round(skewness_vec, 3))
print(results)
## Distribution Mean Median Variance Skewness
## 1 Standard Normal 0.016 0.009 0.983 0.065
## 2 Beta (6,2) 0.739 0.763 0.023 -0.668
## 3 Chi-Square (df=5) 4.925 4.209 9.074 1.201
par(mfrow = c(1, 3))
#Density plot for standard normal
plot(density(x1), main = "Standard Normal")
abline(v = mean_func(x1), col = "red")
abline(v = median_func(x1), col = "blue", lty = 2)
#Density plot for beta
plot(density(x2), main = "Beta(6, 2)")
abline(v = mean_func(x2), col = "red")
abline(v = median_func(x2), col = "blue", lty = 2)
#Density plot for chi square
plot(density(x3), main = "Chi-square(5)")
abline(v = mean_func(x3), col = "red")
abline(v = median_func(x3), col = "blue", lty = 2)
lm object,
returns the top n residuals arranged in descending order according to
their largest absolute values (but returns the residuals, not the
absolute value of the residuals), where the default value for n is 5.
The function should give a clear error message if n is larger than the
number of residuals. Demonstrate that your function works by applying it
to mtcars.lm <- lm(mpg ~ disp, data = mtcars) first with no argument
for n, then with n = 6, and then with n = 40 (error message
expected).top_residuals <- function(lm_object, n = 5) {
residuals <- lm_object$residuals
n_resid <- length(residuals)
if (n > n_resid) {
stop("n is larger than the number of residuals.")
} else {
sorted_resid <- residuals[order(abs(residuals), decreasing = TRUE)]
return(sorted_resid[1:n])
}
}
mtcars.lm <- lm(mpg ~ disp, data = mtcars)
# top 5 residuals (default)
top_residuals(mtcars.lm)
## Toyota Corolla Pontiac Firebird Fiat 128 Merc 280C
## 7.230540 6.086193 6.043775 -4.892201
## Lotus Europa
## 4.719703
# top 6 residuals
top_residuals(mtcars.lm, n = 6)
## Toyota Corolla Pontiac Firebird Fiat 128 Merc 280C
## 7.230540 6.086193 6.043775 -4.892201
## Lotus Europa Hornet Sportabout
## 4.719703 3.937588
#top_residuals(mtcars.lm, n = 40)
# output : Error in top_residuals(mtcars.lm, n = 40) :
# n is larger than the number of residuals.
rsq_by_country <- gapminder %>% split(.$country) %>%
map(~ lm(lifeExp ~ log10(gdpPercap), data = .)) %>%
map_dbl(~ summary(.x)$r.squared)%>%
tibble(country = names(.), rsq = .)
rsq_df <- left_join(rsq_by_country, gapminder %>% select(country, continent), by = "country")
plot <- ggplot(rsq_df, aes(x = continent, y = rsq,fill=continent)) +
geom_boxplot() +
ylab("R-squared") +
ggtitle("R-squared of lifeExp ~ log10(gdpPercap) by continent")
ggplotly()
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.